# Packages ----------------------------------------------------------------

dir <- "not included in supp info"

packagesNeeded <- c("tidyverse",
                    "data.table",
                    "mpi", 
                    "lubridate", 
                    "bankRun",
                    "ggrba",
                    "arphit",
                    "rbatools",
                    "mpi", 
                    "tseries", 
                    "janitor", 
                    "lmtest", 
                    "sandwich",
                    "minpack.lm", 
                    "msm", 
                    "ivreg", 
                    "fixest", 
                    "ggtext")

if(any(!packagesNeeded %in% dplyr::pull(tibble::as_tibble(installed.packages()), Package))){
  
  install.packages(packagesNeeded[!packagesNeeded %in% dplyr::pull(tibble::as_tibble(installed.packages()), Package)])
  
}

for(i in packagesNeeded){library(i, character.only = TRUE)}

# Functions ---------------------------------------------------------------

fetchSOFIA <- function(startDate = NULL,
                       endDate = NULL){
  
  startDate <- asDate(startDate)
  endDate <- asDate(endDate)
  
  directory <- tempdir()
  
  saveDir <- paste0(directory, "/sofiaBeta.xlsx")
  
  rbatools::set_proxy()
  
  utils::download.file("https://www.asx.com.au/content/dam/asx/connectivity-and-data/sofia-beta-version.xlsx",
                       saveDir,
                       mode = "wb")
  
  SOFIAData <- readxl::read_excel(saveDir, skip = 3) %>%
    # dplyr::select(1:3) %>%
    tidyr::drop_na() %>%
    dplyr::mutate(Date = as.Date(as.numeric(.data$Date), origin = "1899-12-30")) %>%
    dplyr::select("Date" = 'Date', "SOFIA" = 'SOFIA Overnight (%) VWAP',
                  "SOFIAVolume" = 'Eligible Volume ($)',
                  "SOFIAMedian" = "SOFIA Overnight  (%) Volume Weighted Median",
                  "SOFIATrades" = "Eligible Trades",
                  "SOFIAMin" = "Minimum Yield",
                  "SOFIAMax" = "Maximum Yield") %>%
    dplyr::mutate(SOFIAVolume = .data$SOFIAVolume/1e9)
  
  if(is.null(endDate)){endDate <- max(SOFIAData$Date)}
  if(is.null(startDate)){startDate <- endDate}
  
  standardDateGuardClauses(startDate, endDate)
  
  cashRateDaily <- fetchCashRate(startDate = "2019-01-01") %>%
    dplyr::select("date",
                  "publishedCashRate",
                  "ESRate") %>%
    dplyr::ungroup() %>%
    dplyr::select("date", "publishedCashRate", "ESRate") %>%
    dplyr::mutate(cashRateSpread = publishedCashRate - ESRate) 
  
  SOFIAFiltered <- SOFIAData %>%
    dplyr::filter(dplyr::between(.data$Date, .env$startDate, .env$endDate))
  
  asxData2020 <- data.table::fread("not included in supp info",
                                   header = TRUE) 
  
  SOFIAAlgo <- asxData2020 %>% 
    as_tibble() %>% 
    mutate(startDate = as.Date(startDate, tryFormats = "%d/%m/%Y"), 
           endDate = as.Date(endDate, tryFormats = "%d/%m/%Y"), 
           issuerType = case_when(InstrumentSubClassFull %in% c("Treasury Bond", 
                                                                "Treasury Indexed Bond",
                                                                "Treasury Note") ~ "AGS",
                                  InstrumentSubClassFull %in% c("Semi Govt Bond") ~ "SGS",
                                  TRUE ~ "Other")) %>%
    dplyr::filter(term == 1,
                  # interestRate != 0,
                  issuerType %in% c("AGS", "SGS"), 
                  interestRate < 1, 
                  !grepl("RBAA", cashReceiverCode), 
                  !grepl("RBAA", cashSenderCode), 
                  startDate >= as.Date("2020-11-21"), 
                  startDate <= as.Date("2021-11-21")) %>% 
    dplyr::group_by(startDate) %>% 
    dplyr::arrange(startDate, desc(interestRate)) %>% 
    dplyr::mutate(csum = cumsum(cashFirstLeg), 
                  total = sum(cashFirstLeg), 
                  share = csum/total*100) %>% 
    dplyr::filter(share <= 75) %>% 
    dplyr::group_by(startDate) %>% 
    dplyr::summarise(SOFIA = weighted.mean(interestRate, cashFirstLeg), 
                     SOFIAVolume = sum(cashFirstLeg)/1e9, 
                     SOFIAMin = min(interestRate), 
                     SOFIAMax = max(interestRate), 
                     SOFIATrades = n(), 
                     Type = "algo"
    ) %>%
    ungroup() %>% 
    rename("Date" = "startDate")
  # dplyr::filter(dplyr::between(startDate, as.Date("2020-12-01"), as.Date("2021-11-01"))) %>% 
  # select(startDate, rate, volume) %>% 
  # `colnames<-`(c("Date", "SOFIA", "SOFIAVolume"))
  
  # To bridge the gap between algo and ASX 
  
  rawASXData <- fetchASXRepoData(startDate = "2021-10-21", 
                                 endDate = "2021-12-31")
  
  businessDays <- tibble::tibble(days = generateBusinessDays(startDate = min(rawASXData$tradeDate),
                                                             endDate = max(rawASXData$tradeDate))) %>%
    dplyr::mutate(shifted = shiftBusinessDays(vecDate = days, shiftBy = 1),
                  adjustment = as.numeric(shifted - days - 1))
  
  SOFIARaw <- rawASXData %>%
    mutate(collateralDummy = case_when(issuerType %in% c("Govt", "Semi-Govt") ~ 1,
                                       TRUE ~ 0)) %>%
    filter(collateralDummy == 1, 
           originalTerm == 1,
           tradeDate == settleDate,
           buyerCode != sellerCode, 
           !buyerCode %in% c("RBAA20", "RBAA23", "RBAA29"),
           !sellerCode %in% c("RBAA20", "RBAA23", "RBAA29")) %>%
    group_by(tradeDate) %>%
    mutate(share = leg1CV/sum(leg1CV)*100) %>%
    arrange(tradeDate, repoRate, share) %>% 
    mutate(cumVolume = cumsum(share)) %>% 
    select(tradeDate, repoRate, leg1CV, share, cumVolume, everything()) %>% 
    filter(cumVolume >= 25) %>% 
    arrange(tradeDate) %>%
    dplyr::summarise(SOFIAMedian = spatstat.geom::weighted.quantile(x = repoRate,
                                                                    w = share,
                                                                    probs = 0.5,
                                                                    type = 1),
                     SOFIA = round(weighted.mean(x = repoRate, w = share), 4),
                     SOFIAMin = min(repoRate), 
                     SOFIAMax = max(repoRate), 
                     SOFIATrades = n(),
                     # nLenders = dplyr::n_distinct(.data$buyerCode),
                     # nBorrowers = dplyr::n_distinct(.data$sellerCode),
                     SOFIAVolume = sum(leg1CV, na.rm = TRUE)/1e9) %>%
    dplyr::ungroup() %>% 
    dplyr::rename("Date" = "tradeDate") %>% 
    dplyr::mutate(Type = "RepoMod")
  
  sofiaFinal <- SOFIAFiltered %>%
    dplyr::mutate(Type = "asx") %>%
    dplyr::bind_rows(SOFIAAlgo) %>%
    dplyr::bind_rows(SOFIARaw) %>%
    dplyr::arrange(.data$Date) %>% 
    dplyr::left_join(cashRateDaily, by = c("Date" = "date")) %>%
    dplyr::mutate(SOFIAspread = (.data$SOFIA - .data$ESRate)*100,
                  SOFIAspreadMedian = (.data$SOFIAMedian - .data$ESRate)*100,
                  SOFIAspreadMin = (.data$SOFIAMin - .data$ESRate)*100,
                  SOFIAspreadMax = (.data$SOFIAMax - .data$ESRate)*100) %>%
    dplyr::select(c("Date", "SOFIA", "SOFIAVolume", "SOFIAspread", "Type", "SOFIAMedian", "SOFIATrades", "SOFIAMin", "SOFIAMax",
                    "SOFIAspreadMedian", "SOFIAspreadMin", "SOFIAspreadMax")) %>% 
    dplyr::distinct()
  
  return(sofiaFinal)
}

# Data --------------------------------------------------------------

## Cash rate ---------------------------------------------------------------

cashRate <- fetchCashRate(startDate = "2019-01-01") %>%
  dplyr::select("date",
                "publishedCashRate",
                "ESRate") %>%
  dplyr::mutate(month = end_of_month(date)) %>%
  dplyr::group_by(month) %>%
  dplyr::filter(date == max(date)) %>%
  dplyr::ungroup() %>%
  dplyr::select("month", "publishedCashRate", "ESRate") %>%
  dplyr::mutate(cashRateSpread = publishedCashRate - ESRate) 

cashRateDaily <- fetchCashRate(startDate = "2019-01-01") %>%
  dplyr::select("date",
                "publishedCashRate",
                "ESRate") %>%
  dplyr::ungroup() %>%
  dplyr::select("date", "publishedCashRate", "ESRate") %>%
  dplyr::mutate(cashRateSpread = publishedCashRate - ESRate) 

## Repo rate ---------------------------------------------------------------

repoRateSOFIA <- fetchSOFIA(startDate = "2020-01-01", 
                            endDate = Sys.Date())

repoRateSOFIAFinal <- repoRateSOFIA %>% 
  mutate(month = rbatools::end_of_month(Date)) %>% 
  group_by(month) %>% 
  filter(Date == max(Date)) %>% 
  select(month, SOFIAspread, SOFIAspreadMedian)

## OMO and ES balances -------------------------------------------------------------

ESBalDaily <- mpi::fetchESBalances(startDate = "2019-01-01", 
                                   endDate = Sys.Date()) %>% 
  filter(!is.na(LatePayments))

ESBal <- ESBalDaily %>% 
  dplyr::select(Date, ESAbalanceSurplus, ESAbalanceEOD) %>% 
  dplyr::rename("ESBal" = "ESAbalanceSurplus") %>% 
  rename("ESBalTotal" = "ESAbalanceEOD") %>% 
  mutate(month = end_of_month(Date)) %>%
  group_by(month) %>%
  filter(Date == max(Date)) %>%
  ungroup() %>% 
  select(-Date) %>% 
  mutate(ESBal = ESBal/1e3, 
         ESBalTotal = ESBalTotal/1e3)

OMOs <- fetchOMOTrades(startDate = "2019-01-01", 
                       endDate = Sys.Date(), 
                       mode = "outstanding")

omoOutstandingVolume <- OMOs %>%
  dplyr::group_by(asAt) %>%
  dplyr::summarise(OMO = sum(cashValue)/1000) %>%
  dplyr::ungroup()

## Overnight NCOs ----------------------------------------------------------

# This spreadsheet is an output from DM.MO/Laurie/Breaking up NCOs/

overnightNCOsAndFunding <- read.csv(paste0(dir, "overnightNCOsAndFunding.csv")) %>% 
  as_tibble() %>% 
  mutate(date = as.Date(date)) %>% 
  select(-X)

## Deposit data ----------------------------------------------------

atCallDep <- readxl::read_excel(path = "not included in supp info.xlsx", 
                                sheet = "1 | Total", 
                                skip = 8) %>% 
  mutate(period = as.Date(period), 
         atCall = `Non-interest bearing` + `Other at-call` + Offset) %>% 
  select(period, atCall) 

depAPRAPre2019 <- readxl::read_excel("not included in supp info.xlsx", 
                                     sheet = "Table 1", 
                                     skip = 0)

depPre2019Final <- depAPRAPre2019 %>% 
  select(Period, 
         `Institution Name`, 
         `Deposits from non-financial corporations`, 
         `Deposits from financial corporations`, 
         `Deposits from general government`, 
         `Deposits from households`, 
         `Deposits from community service organisations and non-profit institutions` 
  ) %>% 
  mutate(totalDep = `Deposits from non-financial corporations` +
           `Deposits from financial corporations` +
           `Deposits from general government` +
           `Deposits from households` +
           `Deposits from community service organisations and non-profit institutions`) %>% 
  arrange(desc(Period)) %>% 
  filter(Period != max(Period)) %>% 
  group_by(Period) %>% 
  summarise(totalDep = sum(totalDep)/1e3) %>% 
  ungroup() %>% 
  mutate(Period = as.Date(Period))

depAPRAPost2019 <- readxl::read_excel("not included in supp info.xlsx", 
                                      sheet = "Table 1", 
                                      skip = 1)

depPost2019Final <- depAPRAPost2019 %>% 
  select(Period, 
         `Institution Name`,
         `Deposits by non-financial businesses`, 
         `Deposits by financial institutions`, 
         `Deposits by general government`, 
         `Deposits by households`, 
         `Deposits by community service organisations`) %>% 
  mutate(totalDep = `Deposits by non-financial businesses` + 
           `Deposits by financial institutions` + 
           `Deposits by general government` +
           `Deposits by households` +
           `Deposits by community service organisations`) %>% 
  arrange(Period) %>% 
  filter(Period >= as.Date("2019-06-30"))  %>% 
  group_by(Period) %>% 
  summarise(totalDep = sum(totalDep)/1e3) %>% 
  ungroup() %>% 
  mutate(Period = as.Date(Period))

depositsLongTimeSeries <- depPost2019Final %>% 
  bind_rows(depPre2019Final) %>% 
  arrange(Period) %>% 
  `colnames<-`(c("period", "TotalDepositsAPRA"))

## Securities lending ------------------------------------------------------

securitiesLendingRBA <- mpi::getSecLendTradesOutstanding(startDate = "2020-01-01", 
                                                         endDate = Sys.Date())

secLendFiltered <- securitiesLendingRBA %>% 
  dplyr::filter(buySell == "Sell", 
                term == 1) %>% 
  dplyr::mutate(securityType = dplyr::case_when(issuerCode == "AUSC ISSUER" ~ "AGS", 
                                                TRUE ~ "Semis"))

finalRBASLOutstanding <- secLendFiltered %>%
  dplyr::group_by(asAt) %>% 
  dplyr::summarise(outstanding = -sum(value, na.rm = TRUE)/1e3) %>% 
  ungroup() %>% 
  dplyr::mutate(month = rbatools::end_of_month(asAt)) %>%
  group_by(month) %>% 
  dplyr::summarise(RBASLOutstanding = last(outstanding, na.rm = TRUE)) %>% 
  dplyr::ungroup() 

## Relative value demand ---------------------------------------------------

bobSpreadRaw <- readxl::read_excel("not included in supp info..xlsx") 
assetSpreadRaw <- readxl::read_excel("not included in supp info..xlsx")

bobList <- list()

aswList <- list()

for(i in as.character(c(1, 2, 3, 5, 7, 10))){
  
  bobList[[paste0("tenor", i)]] <- bobSpreadRaw %>% 
    select(contains(paste0(i, "y"))) %>% 
    `colnames<-`(c("Date", "BOB")) %>% 
    mutate(tenor = i)
  
  aswList[[paste0("tenor", i)]] <- assetSpreadRaw %>% 
    select(contains(paste0(i, "y"))) %>% 
    `colnames<-`(c("Date", "ASW")) %>% 
    mutate(tenor = i)
  
  
}

bobSpreads <- bind_rows(bobList) %>% 
  mutate(Date = as.Date(Date)) %>% 
  distinct()

aswSpreads <- bind_rows(aswList) %>% 
  mutate(Date = as.Date(Date)) %>% 
  distinct()

spreadsFinal <- full_join(bobSpreads, aswSpreads, by = c("Date", "tenor")) %>% 
  drop_na()

relativeValue <- spreadsFinal %>% 
  left_join(cashRateDaily, by = c("Date" = "date")) %>% 
  mutate(AGS = publishedCashRate + BOB/100 + ASW/100 - ESRate, 
         ESRatePlusBankLevy = ESRate + 0.06)

BOBASWFinal <- relativeValue %>% 
  drop_na() %>%
  group_by(Date) %>% 
  summarise(ASW = mean(ASW), 
            BOB = mean(BOB)) %>% 
  ungroup() %>% 
  mutate(month = rbatools::end_of_month(Date)) %>%
  group_by(month) %>% 
  filter(Date == max(Date)) %>% 
  ungroup() %>%
  select(month, ASW, BOB)

## APRA SOFIA --------------------------------------------------------------

database = "FIDO_Prod"
server = NULL

rawDataInput <- list()

startDate <- "2019-09-01"
endDate <- "2024-09-30"

rawDataInput[["A"]][["repo"]] <- fetchAPRAData(startDate = startDate,
                                               endDate = endDate,
                                               form = "A",
                                               instrument = "repo",
                                               database = database,
                                               server = server)

rawDataInput[["A"]][["reverseRepo"]] <- fetchAPRAData(startDate = startDate,
                                                      endDate = endDate,
                                                      form = "A",
                                                      instrument = "reverseRepo",
                                                      database = database,
                                                      server = server)

rawDataInput[["A"]][["repoCollateral"]] <- fetchAPRAData(startDate = startDate,
                                                         endDate = endDate,
                                                         form = "A",
                                                         instrument = "repoCollateral",
                                                         database = database,
                                                         server = server)

rawDataInput[["A"]][["reverseRepoCollateral"]] <- fetchAPRAData(startDate = startDate,
                                                                endDate = endDate,
                                                                form = "A",
                                                                instrument = "reverseRepoCollateral",
                                                                database = database,
                                                                server = server)

repo <- rawDataInput$A$repo %>%
  dplyr::select("id",
                "Dim_Entity_Key",
                "asAt",
                "tradeDate",
                "counterpartySector",
                "counterpartyGroup",
                "counterpartyJurisdiction",
                "currency",
                "originalMaturity",
                "residMat",
                "rate") %>%
  dplyr::mutate(currentMatDate = .data$asAt + .data$residMat)

repoARates <- rawDataInput$A$repoCollateral %>%
  dplyr::left_join(repo, by = c("id", "asAt", "Dim_Entity_Key")) %>%
  dplyr::filter(.data$counterpartySector != "RBA",
                !.data$collType %in% c("Equity", "Other"),
                .data$currency == "AUD"
  ) %>%
  dplyr::mutate(collateralGroup = dplyr::case_when(grepl("AGS", .data$collType) | grepl("SGS", .data$collType) ~ "HQLA",
                                                   TRUE ~ "Non-HQLA")) %>%
  dplyr::mutate(collateralGroupNo = dplyr::case_when(collateralGroup == "HQLA" ~ 1,
                                                     TRUE ~ 0)) %>%
  dplyr::group_by(.data$id) %>%
  dplyr::mutate(averageColl = mean(.data$collateralGroupNo)) %>% # This allows me to calculate HQLA vs. Non-HQLA rates
  dplyr::ungroup()

reverseRepo <- rawDataInput$A$reverseRepo  %>%
  dplyr::select("id",
                "Dim_Entity_Key",
                "asAt",
                "tradeDate",
                "counterpartySector",
                "counterpartyGroup",
                "counterpartyJurisdiction",
                "currency",
                "originalMaturity",
                "residMat",
                "rate") %>%
  dplyr::mutate(currentMatDate = .data$asAt + .data$residMat)

reverseRepoARates <- rawDataInput$A$reverseRepoCollateral %>%
  dplyr::left_join(reverseRepo, by = c("id", "asAt", "Dim_Entity_Key")) %>%
  dplyr::filter(.data$counterpartySector != "RBA",
                !.data$collType %in% c("Equity", "Other"),
                .data$currency == "AUD") %>% # private market + bonds only
  dplyr::mutate(collateralGroup = dplyr::case_when(grepl("AGS", .data$collType) | grepl("SGS", .data$collType) ~ "HQLA",
                                                   TRUE ~ "Non-HQLA")) %>%
  dplyr::mutate(collateralGroupNo = dplyr::case_when(.data$collateralGroup == "HQLA" ~ 1,
                                                     TRUE ~ 0)) %>%
  dplyr::group_by(.data$id) %>%
  dplyr::mutate(averageColl = mean(.data$collateralGroupNo)) %>% # This allows me to calculate HQLA vs. Non-HQLA rates
  dplyr::ungroup() # come back to this - mean of the actual collateral value?

allRepoTrades <- dplyr::bind_rows(repo = repoARates,
                                  reverseRepo = reverseRepoARates,
                                  .id = "type") %>%
  dplyr::mutate(repoType = dplyr::case_when(.data$averageColl == 1 ~ "GC1", # only includes HQLA as collateral
                                            TRUE ~ "GC2")) %>% # includes hybrid collateral (HQLA + some other stuff) as well as repos entirely collateralised as non-HQLA.
  dplyr::filter(.data$collMV != 0,
                .data$collCurrency == "AUD") %>% # important for calculating rates. Non-AUD collateral can significantly affect rates.
  # The line below takes care of double counting
  dplyr::mutate(collMV = dplyr::case_when(.data$counterpartyGroup == "Banks and RFCs" ~ collMV/2,
                                          TRUE ~ .data$collMV)) %>%
  APRAMaturityBuckets(reportDate = "asAt",
                      dateCol = "currentMatDate",
                      bucketCol = "tenor",
                      bucketPosition = "end",
                      thresholdDates = c(-9999,
                                         0,
                                         2,
                                         15, # 2 weeks
                                         45, # 1 month = 16 - 44 days
                                         75, # 2 months = 45 - 75 days
                                         105, # 3 months = 76 - 105 days
                                         1e6),
                      bucketNames = c("Undefined (open repo)",
                                      "<= 1 day",
                                      "> 1 day to <= 2 weeks",
                                      "1 month",
                                      "2 month",
                                      "3 month",
                                      "> 3 month"))

repoRatesByEntityCurrent <- allRepoTrades %>%
  dplyr::group_by(.data$id) %>%
  dplyr::filter(.data$asAt == tradeDate, # otherwise multiple outstanding positions for a given trade date
                .data$collMV > 0, 
                collateralGroup == "HQLA", 
                tenor == "<= 1 day")

APRASOFIACalc <- repoRatesByEntityCurrent %>%
  dplyr::group_by(.data$tradeDate) %>%
  mutate(shareVol = collMV/sum(collMV)*100) %>% 
  dplyr::arrange(tradeDate, rate, shareVol) %>% 
  mutate(cumulativeQ = cumsum(shareVol)) %>% 
  filter(cumulativeQ >= 25) %>% 
  dplyr::summarise(APRASOFIAVWAP = weighted.mean(x = .data$rate,
                                                 w = .data$shareVol), 
                   APRASOFIA = spatstat.geom::weighted.quantile(x = .data$rate,
                                                                w = .data$shareVol,
                                                                probs = 0.5,
                                                                type = 1)) %>%
  dplyr::ungroup() %>% 
  left_join(cashRate, by = c("tradeDate" = "month")) %>% 
  mutate(APRASOFIASpread = (APRASOFIA - ESRate)*100, 
         APRASOFIAVWAPSpread = (APRASOFIAVWAP - ESRate)*100) %>% 
  select(tradeDate, APRASOFIA, APRASOFIAVWAP, APRASOFIAVWAPSpread, APRASOFIASpread)

bondOIS <- bankRun::sql_run("not included in supp info", 
                            database = "MARKET_DATA_PROD")

bondOISFinal <- bondOIS %>% 
  filter(IsActive == TRUE) %>% 
  select(BusinessDate, InstrumentCode, MeasureValueNum) %>% 
  pivot_wider(names_from = InstrumentCode, values_from = MeasureValueNum) %>% 
  mutate(bondOIS3year = GACGB3 - ADSO3, 
         bondOIS10year = GACGB10 - ADSO10) %>% 
  select(BusinessDate, contains("bondOIS")) %>% 
  drop_na() %>% 
  mutate(month = rbatools::end_of_month(BusinessDate)) %>%
  group_by(month) %>% 
  filter(BusinessDate == max(BusinessDate)) %>% 
  ungroup() %>% 
  select(month, contains("bondOIS")) %>% 
  arrange(month)

## APRA Specials calc ------------------------------------------------------

RBASLToBind <- finalRBASLOutstanding %>% 
  mutate(collMV = RBASLOutstanding*1e9) %>% 
  left_join(cashRate, by = "month") %>% 
  mutate(rate = ESRate - 0.2) %>% 
  select(month, collMV, rate) %>% 
  rename("tradeDate" = "month") %>% 
  mutate(marketType = "RBA")

APRARepoSpecialsUnder10 <- repoRatesByEntityCurrent %>%
  mutate(marketType = "Private") %>% 
  bind_rows(RBASLToBind) %>% 
  dplyr::group_by(.data$tradeDate) %>%
  mutate(shareVol = collMV/sum(collMV)*100) %>% 
  dplyr::arrange(tradeDate, rate, shareVol) %>% 
  mutate(cumulativeQ = cumsum(shareVol)) %>%
  left_join(APRASOFIACalc, by = "tradeDate") %>% 
  mutate(specialsSpread = (APRASOFIA - rate)*100) %>% 
  select(tradeDate, rate, APRASOFIA, specialsSpread, collMV, everything()) %>% 
  filter(specialsSpread > 10) %>% 
  group_by(tradeDate) %>% 
  summarise(APRAVolumeGCMinus10 = max(cumulativeQ)) %>% 
  ungroup()

## Balance sheet -----------------------------------------------------------

balanceSheet <- readxl::read_excel("not included in supp info.xlsx", 
                                   sheet = "dd-FA Balance Sheet", 
                                   skip = 10, 
                                   range = "A1:AI2000")

foreignDep <- balanceSheet %>% 
  select(`...1`, `...35`) %>% 
  `colnames<-`(c("Date", "foreignDep")) %>% 
  drop_na() %>% 
  
  mutate(Date = as.Date(Date),
         month = rbatools::end_of_month(Date),
         foreignDep = as.numeric(foreignDep)/1e9) %>% 
  group_by(month) %>% 
  filter(Date == max(Date)) %>% 
  ungroup() %>% 
  select(month, foreignDep)

## Uninsured deposits ------------------------------------------------------

fcs_910 <- 
  sqlfido::fido_data(
    form_code = "ARF_910_0",
    form_item = c("BSL22704", "BSL22710", "BSL22705", "BSL22711"),
    # frequency = "1m", 
    caching = TRUE
  ) |>
  dplyr::transmute(
    period,
    institution_code,
    form_item = rbatools::from_to(
      form_item,
      c("BSL22704", "BSL22710", "BSL22705", "BSL22711"),
      c("num_acc", "num_fcs", "val_acc", "val_fcs")),
    value
  )
# dplyr::left_join(cfr_metadata, by = c("period", "institution_code"))

fcs_shares <- fcs_910 |>
  tidyr::pivot_wider(names_from = form_item, values_from = value) |>
  dplyr::group_by(period) |>
  dplyr::summarise(
    UninsuredDepo =  (sum(val_acc) - sum(val_fcs))/1e9,
    totalDep = sum(val_acc)/1e9,
    
    .groups = "keep"
  ) |>
  dplyr::ungroup() %>% 
  mutate(share = UninsuredDepo/totalDep) %>% 
  filter(!period %in% c(as.Date("2019-12-31"), as.Date("2018-12-31")))

fcs_shares %>% 
  ggrba() + 
  geom_line(aes(x = period, y = share))

fcs_base <- 
  sqlfido::fido_data(
    form_code = "ARF_747_0A",
    form_item = c("BSL22517", "BSL22520", "BSL22496"),
    caching = TRUE
  ) |>
  dplyr::transmute(
    period,
    institution_code,
    form_item = rbatools::from_to(
      form_item,
      c("BSL22517", "BSL22520", "BSL22496"),
      c("250500", "500plus", "total")),
    value
  ) %>% 
  mutate(group = case_when(form_item == "total" ~ "Total", 
                           TRUE ~ "Uninsured")) %>% 
  group_by(period, group) %>% 
  summarise(totalHH = sum(value)/1e9) %>% 
  ungroup() %>% 
  pivot_wider(names_from = group, values_from = totalHH) %>% 
  rename("asAt" = "period") %>% 
  mutate(Insured = Total - Uninsured) %>% 
  select(-Total)
# mutate(group = "Uninsured")


fcs_nonHH <- bankRun::sql_run("not included in supp info.sql", 
                              database = "FIDO_Prod", 
                              startDate = as.Date("2020-01-01"), 
                              endDate = Sys.Date())

finalNonHH <- fcs_nonHH %>% 
  dplyr::mutate(value = dplyr::case_when(is.na(.data$Value_Text) ~ as.character(.data$Value_Number),
                                         TRUE ~ as.character(.data$Value_Text))) %>%
  dplyr::select("Dim_Form_Key",
                "Dim_Entity_Key",
                "Source_Sequence_Number",
                "RBA_Entity_Shortname",
                "Internal_Entity_Name",
                "Ownership",
                "Entity_ABN",
                "asAt",
                "Attribute_Name",
                "value",
                "complexAttribute",
                "complexAttributeValue") %>%
  tidyr::pivot_wider(names_from = "Attribute_Name",  values_from = "value") %>%
  tidyr::pivot_wider(names_from = "complexAttribute", values_from = "complexAttributeValue") %>% 
  dplyr::transmute(
    asAt,
    RBA_Entity_Shortname,
    OI14293,
    OI14294,
    BSL22522,
    BSL22523,
    BSL22524) %>% 
  mutate(group = case_when(OI14294 %in% c("Balance > $250,000 to <= $500,000", 
                                          "Balance > $10,000,000 to <= $100,000,000", 
                                          "Balance > $1,000,000 to <= $10,000,000", 
                                          "Balance > $100,000,000") ~ "Uninsured", 
                           TRUE ~ "Insured")) %>% 
  mutate(BSL22523 = as.numeric(BSL22523)) %>% 
  group_by(asAt, group) %>% 
  summarise(totalNonHH = sum(BSL22523)/1e9) %>% 
  ungroup()

finalNonHH %>%
  pivot_wider(names_from = group, values_from = totalNonHH) %>% 
  `colnames<-`(c("asAt", "Insured_nonHH", "Uninsured_nonHH")) %>% 
  left_join(fcs_base, by = c("asAt")) %>% 
  mutate(UninsuredTotal = Uninsured_nonHH + Uninsured) %>% 
  left_join(fcs_shares, by = c("asAt" = "period")) %>% 
  mutate(diff = UninsuredTotal - UninsuredDepo) %>% 
  mutate(share2 = UninsuredTotal/totalDep) %>% 
  filter(!is.na(diff)) %>% 
  select(asAt, UninsuredTotal, UninsuredDepo, totalDep, diff, share, share2) %>% 
  mutate(diff2 = share2 - share)

uninsuredDeposits <- finalNonHH %>%
  pivot_wider(names_from = group, values_from = totalNonHH) %>% 
  `colnames<-`(c("asAt", "Insured_nonHH", "Uninsured_nonHH")) %>% 
  left_join(fcs_base, by = c("asAt")) %>% 
  mutate(UninsuredTotal = Uninsured_nonHH + Uninsured) %>% 
  select(asAt, UninsuredTotal)

payments <- readrba::read_rba(series_id = "CRTGSVIT") 

paymentsMonthly <- payments %>% 
  select(date, value) %>% 
  `colnames<-`(c("date", "payments")) %>% 
  mutate(date = rbatools::end_of_month(date))

## Join all data -----------------------------------------------------------

finalData <- APRASOFIACalc %>% 
  left_join(repoRateSOFIAFinal, by = c("tradeDate" = "month")) %>% # can change to full join when we have more data
  left_join(cashRate, by = c("tradeDate" = "month")) %>% 
  left_join(ESBal, by = c("tradeDate" = "month")) %>% 
  left_join(overnightNCOsAndFunding, by = c("tradeDate" = "date")) %>%
  left_join(omoOutstandingVolume, by = c("tradeDate" = "asAt")) %>% 
  left_join(APRARepoSpecialsUnder10, by = "tradeDate") %>% 
  left_join(depositsLongTimeSeries, by = c("tradeDate" = "period")) %>% 
  left_join(atCallDep, by = c("tradeDate" = "period")) %>%
  left_join(finalRBASLOutstanding, by = c("tradeDate" = "month")) %>%
  left_join(foreignDep, by = c("tradeDate" = "month")) %>%
  left_join(BOBASWFinal, by = c("tradeDate" = "month")) %>% 
  left_join(bondOISFinal, by = c("tradeDate" = "month")) %>% 
  left_join(paymentsMonthly, by = c("tradeDate" = "date"))

write.csv(finalData, file = paste0(dir, "Supplementary info/", "Data for estimation.csv"))
